<?php
###############   COPYLEFT GPLv3 LICENSE   ###############
##
## Copyright 2009 GPLv3 - http://www.opensource.org/licenses/gpl-3.0.html
##
## Anthony Gallon
## oi_antz@hotmail.com
##
## Permission is hereby granted to any person having a copy of this software
## to freely use and modify as required so long as the copyright notices
## and branding remain intact.
##
###############   COPYLEFT GPLv3 LICENSE   ###############

if(!class_exists('phpQuery')) die('Antz_TagFilter requires class phpQuery - see '.__FILE__.', line '.__LINE__);

/**
 * Strips unwanted and malicious tags from html content with whitelist and blacklist approach.
 * Supports whitelist tagnames, attributes and explicit tag/attribute combinations
 */
class Antz_TagFilter
{
    protected $attributeWhitelist = array();
    protected $attributeBlacklist = array();
    protected $tagnameWhitelist = array();
    protected $tagnameBlacklist = array();
    protected $explicitWhitelist = array();
    protected $explicitBlacklist = array();
    protected $htmlMode = 'xhtml';
    protected $errors = array();
    protected $removeNodes = array();
    protected $allowDoctype = false;
    protected $config = null;

    public function __construct(){

    }

    public function getConfig(){
        return $this->config;
    }

    public function setConfig(Antz_TagFilter_Config $config){
        $this->config = $config;
        foreach($this->config->get() as $propertyName=>$propertyValue){
            if(property_exists($this, $propertyName)){
                if(is_array($propertyValue) && is_array($this->$propertyName)){
                    $this->$propertyName = array_merge($this->$propertyName, $propertyValue);
                }
            }else if(is_string($propertyValue) || is_bool($propertyValue) || is_null($propertyValue)){
                $this->$propertyName = $propertyValue;
            }
        }
    }

    public function getErrors(){
        return $this->errors;
    }

    /**
     * Set the mode which phpQuery runs (XHTML or HTML)
     * @param string $mode
     */
    public function setHtmlMode($mode='xhtml'){
        $mode = strtolower((string) $mode);
        if($mode === 'xhtml' || $mode === 'html') $this->htmlMode = $mode;
    }

    /**
     * Overwrite attributes whitelist with new values
     * @param mixed $atts
     */
    public function setAttributeWhitelist($atts){
        if(!is_array($atts)) return;
        $this->attributeWhitelist = array();
        $this->addAttributeWhitelist($atts);
    }

    /**
     * Overwrite attributes blacklist with new values
     * @param mixed $atts
     */
    public function setAttributeBlacklist($atts){
        if(!is_array($atts)) return;
        $this->attributeBlacklist = array();
        $this->addAttributeBlacklist($atts);
    }

    /**
     * Overwrite tagname whitelist with new values
     * @param mixed $tags
     */
    public function setTagnameWhitelist($tags){
        if(!is_array($tags)) return;
        $this->tagnameWhitelist = array();
        $this->addTagnameWhitelist($tags);
    }

    /**
     * Overwrite tagname blacklist with new values
     * @param mixed $tags
     */
    public function setTagnameBlacklist($tags){
        if(!is_array($tags)) return;
        $this->tagnameBlacklist = array();
        $this->addTagnameBlacklist($tags);
    }

    /**
     * Overwrite explicit blacklist with new values
     * @param mixed $tags
     */
    public function setExplicitBlacklist($tags){
        if(!is_array($tags)) return;
        $this->explicitBlacklist = array();
        $this->addExplicitBlacklist($tags);
    }

    /**
     * Overwrite explicit whitelist with new values
     * @param mixed $tags
     */
    public function setExplicitWhitelist($tags){
        if(!is_array($tags)) return;
        $this->explicitWhitelist = array();
        $this->addExplicitWhitelist($tags);
    }

    /**
     * Add an explicit blacklist rule (tagname=>attname)
     * @param mixed $tags
     */
    public function addExplicitBlacklist($tags){
        if(!is_array($tags)) return;
        if(count($tags)==1){
            foreach($tags as $tagname=>$attname){
                if(is_array($attname)){
                    $this->addExplicitBlacklist($attname);
                    return;
                }else{
                    $this->explicitBlacklist[] = array($tagname=>$attname);
                    return;
                }
            }
        }else{
            foreach($tags as $tagname=>$attname){
                if(is_array($attname)){
                    $this->addExplicitBlacklist($attname);
                }else{
                    $this->explicitBlacklist[] = array($tagname=>$attname);
                }
            }
        }
    }

    /**
     * Add an explicit blacklist rule (tagname=>attname)
     * @param mixed $tags
     */
    public function addExplicitWhitelist($tags){
        if(!is_array($tags)) return;
        if(count($tags)==1){
            foreach($tags as $tagname=>$attname){
                if(is_array($attname)){
                    $this->addExplicitWhitelist($attname);
                    return;
                }else{
                    $this->explicitWhitelist[] = array($tagname=>$attname);
                    return;
                }
            }
        }else{
            foreach($tags as $tagname=>$attname){
                if(is_array($attname)){
                    $this->addExplicitWhitelist($attname);
                }else{
                    $this->explicitWhitelist[] = array($tagname=>$attname);
                }
            }
        }
    }

    /**
     * Add an tagname blacklist rule
     * @param mixed $tagname
     */
    public function addTagnameBlacklist($tagname){
        if(is_array($tagname)){
            foreach($tagname as $tag){
                $this->addTagnameBlacklist($tag);
            }
        }else{
            if(!in_array($tagname, $this->tagnameBlacklist)) $this->tagnameBlacklist[] = trim($tagname);
        }
    }

    /**
     * Add an tagname whitelist rule
     * @param mixed $tagname
     */
    public function addTagnameWhitelist($tagname){
        if(is_array($tagname)){
            foreach($tagname as $tag){
                $this->addTagnameWhitelist($tag);
            }
        }else{
            if(!in_array($tagname, $this->tagnameWhitelist)) $this->tagnameWhitelist[] = trim($tagname);
        }
    }

    /**
     * Add an attribute blacklist rule
     * @param mixed $att
     */
    public function addAttributeBlacklist($att){
        if(is_array($att)){
            foreach($att as $at){
                $this->addAttributeBlacklist($at);
            }
        }else{
            if(!in_array($att, $this->attributeBlacklist)) $this->attributeBlacklist[] = trim($att);
        }
    }

    /**
     * Add an attribute whitelist rule
     * @param mixed $att
     */
    public function addAttributeWhitelist($att){
        if(is_array($att)){
            foreach($att as $at){
                $this->addAttributeWhitelist($at);
            }
        }else{
            if(!in_array($att, $this->attributeWhitelist)) $this->attributeWhitelist[] = trim($att);
        }
    }

    /**
     * Remove a tagname blacklist rule
     * @param mixed $tagname
     */
    public function removeTagnameBlacklist($tagname){
        if(is_array($tagname)){
            foreach($tagname as $tag){
                $this->removeTagnameBlacklist($tag);
            }
        }else{
            if(in_array($tagname, $this->tagnameBlacklist)) unset($this->tagnameBlacklist[trim($tagname)]);
        }
    }

    /**
     * Remove a tagname whitelist rule
     * @param mixed $tagname
     */
    public function removeTagnameWhitelist($tagname){
        if(is_array($tagname)){
            foreach($tagname as $tag){
                $this->removeTagnameWhitelist($tag);
            }
        }else{
            if(in_array($tagname, $this->tagnameWhitelist)) unset($this->tagnameWhitelist[trim($tagname)]);
        }
    }

    /**
     * Remove an attribute blacklist rule
     * @param mixed $att
     */
    public function removeAttributeBlacklist($att){
        if(is_array($att)){
            foreach($att as $at){
                $this->removeAttributeBlacklist($at);
            }
        }else{
            if(in_array($att, $this->attributeBlacklist)) unset($this->attributeBlacklist[trim($att)]);
        }
    }

    /**
     * Remove an attribute whitelist rule
     * @param mixed $att
     */
    public function removeAttributeWhitelist($att){
        if(is_array($att)){
            foreach($att as $at){
                $this->removeAttributeWhitelist($at);
            }
        }else{
            if(in_array($att, $this->attributeWhitelist)) unset($this->attributeWhitelist[trim($att)]);
        }
    }


    /**
     * Sanitizes and returns supplied HTML with all blacklisted and non-whitelisted tags/attributes removed
     * @param string $content
     * @return string $content
     */
    public function process($content){
        $this->removedNodes = array();
		if(is_array($content)){
			foreach($content as $k=>$v){
				$content[$k] = $this->process($v);
			}
			return implode('', $content);
		}
        $content = trim($content);

        foreach($this->tagnameBlacklist as $k=>$tagname){
            $content = eregi_replace("<{$tagname}[^>]*>.*</{$tagname}[^>]*>", "", $content);
            $content = eregi_replace("<{$tagname}[^>]*>", "", $content);
        }

        $dom = $this->initDom($content);

        foreach($dom->elements as $k => &$el){
            $this->processElement($el);
        }

        foreach($this->explicitBlacklist as $tagname=>$attribute){
            if(array_key_exists($tagname, $this->explicitWhitelist) && $this->explicitWhitelist[$tagname]==$attribute){
                // explicit allowed overrides explicit deny
                continue;
            }
            $removedNodes = pq($tagname.'['.$attribute.']');
            foreach($removedNodes as $node){
                $node->removeAttribute($attribute);
            }
        }

        foreach($this->removedNodes as $obj){
            pq($obj)->remove();
        }

        $content = (string) $dom;

        return $content;
    }


    /**
     * Creates a new phpQuery dom element
     * @param string $content
     * @return object DOMDocument
     */
    protected function initDom($content){
        switch($this->htmlMode){
            case 'xhtml':
                $dom = phpQuery::newDocumentXhtml($content);
            break;
            case 'html':
                $dom = phpQuery::newDocumentHtml($content);
            break;
            default:
                $this->errors[] = 'Invalid mode: should be xhtml or html';
                return $content;
        }
        return $dom;
    }

    /**
     * Removes blacklisted and non-whitelisted attributes from the element and recurses into all child nodes
     * @param DOMElement $el
     */
    protected function processElement(&$el){

        if(false === ($el instanceof DOMElement) && false === ($el instanceof DOMDocument)){
            return;
        }

        $invalidAtts = array();
        $elAtts = $el->attributes;
        if($elAtts==null) $elAtts = array();

        foreach($elAtts as $k3=>$att){
            // check if explicitly allowed
            $explicitelyAllowed = false;
            foreach($this->explicitWhitelist as $tagname=>$attname){
                if($tagname == $el->nodeName && $attname == $att->name){
                    $explicitelyAllowed = true;
                }
            }
            if(in_array($att->name, $this->attributeBlacklist)) $invalidAtts[] = $att->name;
            else if(!in_array($att->name, $this->attributeWhitelist) && !$explicitelyAllowed) $invalidAtts[] = $att->name;

        }

        foreach($invalidAtts as $k => $v){
            $el->removeAttribute($v);
        }
        $childNodes = $el->childNodes;

        if(is_object($childNodes) && $childNodes->length > 0){
            for($i=0, $max=$childNodes->length; $i<$max; $i++){
                $this->processElement($childNodes->item($i));
            }
        }
        if($el instanceof DOMDocument) return;


        if(in_array($el->nodeName, $this->tagnameBlacklist) && !array_key_exists($el->nodeName, $this->explicitWhitelist)){
            $this->removedNodes[] = $el;

        }else if(!in_array($el->nodeName, $this->tagnameWhitelist) && !array_key_exists($el->nodeName, $this->explicitWhitelist)){
            $this->removedNodes[] = $el;

        }

    }

}
